# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（浙江）
"""

import sys
import traceback
import time
import re
from scpy.logger import get_logger
import requests
from bs4 import BeautifulSoup
# from zj_kill_captcha import zj_kill_captcha as kill_captcha
from zj_kill_captcha_2 import kill_captcha
import copy
import json
import random
import StringIO

import zj_table as table
import zj_trans_dict as TR
import sd_template_dict as TE
import sd_format as FO
import zj_check_captcha as CC
import zj_post_img


# import request_util
# ua = random.choice(request_util.USER_AGENTS)
# ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36"
ua = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-CN; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15'
SLEEP_TIME = 10

reload(sys)
sys.setdefaultencoding('utf8')
logger = get_logger(__file__)


def download_captcha_kill(companyName):
    # saic_index = "http://gsxt.saic.gov.cn/"
    index_url = "http://gsxt.zjaic.gov.cn/zhejiang.jsp"
    # search_url = "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do"
    # search_url = "http://gsxt.zjaic.gov.cn/search/doEnGeneralQueryPage.do#"

    # print ua
    req = requests.session()
    index_res = req.get(index_url, headers={'User-Agent': ua, })
    index_res.cookies.items()
    cookie = index_res.cookies.items()
    if not cookie or not cookie[0] or len(cookie[0]) < 1:
        raise Exception("cookie获取失败")
    jsessionid = index_res.cookies.items()[0][1]

    req.get(index_url)
    req.headers = {
        'Host': 'gsxt.zjaic.gov.cn',
        'Referer': 'http://gsxt.zjaic.gov.cn/zhejiang.jsp',
        'User-Agent': ua,
        'Cookie': "JSESSIONID=%s" % jsessionid,
    }
    img_url = 'http://gsxt.zjaic.gov.cn/common/captcha/doReadKaptcha.do;jsessionid=%s' % jsessionid
    img = req.get(img_url).content
    if not img:
        logger.error("从网站下载验证码为空！重复下载！")
        return ''

    # 判断是否需要重复下载图片,以调用api
    img_sio_file = StringIO.StringIO(img)
    try:
        img_reload = CC.ocr_qualify(img_sio_file)
    except:
        time.sleep(SLEEP_TIME)
        return ''
    if not img_reload:
        logger.error("图片复杂,重复下载！")
        return 're download'

    logger.info("len(img)=%s" % len(img))

    # with open('./zj.jpg', 'wb') as fp:
    #     fp.write(img)
    # # print img
    # res_code = raw_input('input=')

    try:
        res_code = kill_captcha(img, 'zj', 'jpg')
        # print 'res code: ', res_code
    except Exception, e:
        logger.error("破解验证码的服务出现异常")
        logger.error(e)
        raise e
    if not res_code or len(res_code) > 100 or str(res_code) in ['None', 'wrong']:
        logger.info('验证码为:%s' % res_code)
        logger.error("破解验证码的服务出现异常,可能是下载的验证码错误，也可能破解服务出现异常！")
        return 'exception'  # 用于统计api 错误率
        # return ''   # 返回空字符串，用于重复破解

    check_data = {
        'verifyCode': res_code,
        'name': companyName,
    }
    check_1_url = 'http://gsxt.zjaic.gov.cn/search/doValidatorVerifyCode.do;jsessionid=%s' % jsessionid
    check_res = req.post(url=check_1_url, data=check_data)
    logger.info("%s" % check_res)
    if check_res.status_code == 200 and check_res.content \
            and json.loads(check_res.content).get("nameResponse", {}).get("message", "") == "true":
        # zj_post_img.post_img(img, True, res_code)
        pass
    else:
        logger.info("网站返回,验证码错误！")
        logger.info(check_res.content)
        # zj_post_img.post_img(img, False, res_code)
        return "error"  # 用于统计api 错误率
        # return ""       # 返回空字符串，用于重复破解

    check_url = 'http://gsxt.zjaic.gov.cn/search/doGetAppSearchResult.do;jsessionid=%s' % jsessionid

    com_res = req.post(url=check_url, data=check_data).content
    # print com_res

    if "异常警告" in com_res:
        logger.info("IP 或 UA 被封了")
        raise Exception("IP or UA is barred!")

    if "您搜索的条件无查询结果" in com_res:
        logger.info("公司不存在,关键词为:%s" % companyName)
        return None
    com_url_list = re.findall('''href="(/appbasicinfo/doViewAppBasicInfoByLog.*?)"''', com_res)
    if not com_url_list:
        logger.info("公司不存在,关键词为:%s" % companyName)
        return None
    else:
        logger.info("公司存在,搜索到%s条" % len(com_url_list))
        logger.info(json.dumps(com_url_list, ensure_ascii=False))
        return com_url_list[0]


def get_company_info(com_info):
    if not com_info:
        raise Exception("com_list 错误")
    raw_dict = {
        "province": "zj",
        "type": "1",
        "html": "",
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    com_req = requests.session()
    com_req.headers = {
        'Host': 'gsxt.zjaic.gov.cn',
        'Referer': 'http://gsxt.zjaic.gov.cn/zhejiang.jsp',
        'User-Agent': ua,
    }

    corpid = re.findall('''corpid=(.*)''', com_info)[0]
    base_html_dict = {}
    root_url = "http://gsxt.zjaic.gov.cn"

    # 注册号请求
    com_url_reg = root_url + '/appbasicinfo/doViewAppBasicInfo.do?corpid=' + corpid
    raw_reg = com_req.get(com_url_reg).content
    base_html_dict["reg"] = raw_reg

    # 基本信息
    com_url = root_url + "/appbasicinfo/doReadAppBasicInfo.do?corpid=" + corpid
    raw_base = com_req.get(com_url).content
    base_html_dict["base"] = raw_base

    # 变更信息翻页
    table_s = BeautifulSoup(raw_base, "html5lib").find_all("table", attrs={"class": "detailsList"})
    alter_more_list = []
    share_more_list = []
    for item in table_s:
        # print item
        item = str(item)
        page_num = re.findall("共(\d+)页", item)
        if not page_num or int(page_num[0]) <= 1:
            continue
        else:
            page_num = int(page_num[0])

        if "变更信息" in item:
            # for i in range(2, page_num+1):
            #     logger.info("获取翻页信息:%s" % page_num)
            #     req_alter_data = {
            #         "checkAlterPagination.currentPage": str(i),
            #         "checkAlterPagination.pageSize": "5",
            #     }
            #     req_alter_data = {"checkAlterPagination.currentPage": str(i), "checkAlterPagination.pageSize": "5", }
            #     alter_more_res = com_req.post(com_url, data=req_alter_data).content
            #     alter_more_list.append(alter_more_res)

            logger.info("获取翻页信息:%s" % page_num)
            req_alter_data = {
                "checkAlterPagination.currentPage": "1",
                "checkAlterPagination.pageSize": 5 * int(page_num),
            }
            alter_more_res = com_req.post(com_url, data=req_alter_data).content
            alter_more_list.append(alter_more_res)

        elif "股东信息" in item:
            # for i in range(2, page_num+1):
            #     logger.info("获取翻页信息:%s" % page_num)
            #     req_share_data = {
            #         ".currentPage": str(i),
            #         ".pageSize": "5",
            #     }
            #     share_more_res = com_req.post(com_url, data=req_share_data).content
            #     share_more_list.append(share_more_res)
            #

            logger.info("获取翻页信息:%s" % page_num)
            req_share_data = {
                "entInvestorPagination.currentPage": "1",
                "entInvestorPagination.pageSize": 5 * int(page_num),
            }
            share_more_res = com_req.post(com_url, data=req_share_data).content
            share_more_list.append(share_more_res)

    base_html_dict["alter_more"] = alter_more_list
    base_html_dict["share_more"] = share_more_list

    # 备案信息
    filinginfo_url = root_url + "/filinginfo/doViewFilingInfo.do?corpid=" + corpid
    base_html_dict["filinginfo"] = raw_filinginfo = com_req.get(filinginfo_url).content

    table_s = BeautifulSoup(raw_filinginfo, "html5lib").find_all("table", attrs={"class": "detailsList"})
    people_more_list = []
    branch_more_list = []
    for item in table_s:
        # print item
        item = str(item)
        page_num = re.findall("共(\d+)页", item)
        if not page_num or int(page_num[0]) <= 1:
            continue
        else:
            page_num = int(page_num[0])
        if "主要人员信息" in item:
            # for i in range(2, page_num+1):
            logger.info("获取翻页信息:%s" % page_num)
            req_people_data = {
                "entMemberPagination.currentPage": "1",
                "entMemberPagination.pageSize": 5 * int(page_num),
            }
            people_more_res = com_req.post(filinginfo_url, data=req_people_data).content
            people_more_list.append(people_more_res)

        elif "分支机构信息" in item:
            # for i in range(2, page_num+1):
            logger.info("获取翻页信息:%s" % page_num)
            req_branch_data = {
                "branchInfoPagination.currentPage": "1",
                "branchInfoPagination.pageSize": 5 * int(page_num),
            }
            branch_more_res = com_req.post(filinginfo_url, data=req_branch_data).content
            branch_more_list.append(branch_more_res)

    base_html_dict["people_more"] = people_more_list
    base_html_dict["branch_more"] = branch_more_list

    # 股权出质
    equityall_url = root_url + "/equityall/doReadEquityAllListFromPV.do?corpid=" + corpid
    base_html_dict["equityall"] = com_req.get(equityall_url).content

    # 动产抵押 (空)
    "/dcdyapplyinfo/doReadDcdyApplyinfoList.do?regNo=&uniSCID="

    # 行政处罚
    punishment_url = root_url + "/punishment/doViewPunishmentFromPV.do?corpid=" + corpid
    base_html_dict["punishment"] = com_req.get(punishment_url).content

    # 经营异常
    catalogapply_url = root_url + "/catalogapply/doReadCatalogApplyList.do?corpid=" + corpid
    base_html_dict["catalogapply"] = com_req.get(catalogapply_url).content

    # 严重违法
    blacklist_url = root_url + "/blacklist/doViewBlackListInfo.do?corpid=" + corpid
    base_html_dict["blacklist"] = com_req.get(blacklist_url).content

    # 抽查检查
    pubcheckresult_url = root_url + "/pubcheckresult/doViewPubCheckResultList.do?corpid=" + corpid
    base_html_dict["pubcheckresult"] = com_req.get(pubcheckresult_url).content

    # 年报
    year_info_list_1_url = root_url + "/annualreport/doViewAnnualReportIndex.do?corpid=" + corpid
    year_info_list_1_res = com_req.get(year_info_list_1_url).content

    year_info_list_2_url = re.findall('''(/pubreportinfo/doReadPubReportInfoList.*?)"''', year_info_list_1_res)
    if year_info_list_2_url:
        year_info_list_2_url = root_url + year_info_list_2_url[0]
        year_info_list_2_res = com_req.get(year_info_list_2_url).content
        year_url_list = re.findall('''<a href="(/pubreportinfo/doReadPubReportInfo.*?)"''', year_info_list_2_res)
    else:
        year_url_list = []

    raw_year_report_list = []

    for item in year_url_list:
        raw_year_report_dict = {}
        # 以兼容分页情况。
        raw_year_web_list = []
        raw_year_share_list = []
        raw_year_invest_list = []
        raw_year_guarantee_list = []
        raw_year_stock_list = []
        raw_year_mod_list = []

        year = re.findall("year=(.*)", item)
        year = year[0] if year else ""
        raw_year_report_dict["year"] = year

        # 基本信息
        year_res = com_req.get(root_url + item).content
        raw_year_report_dict["base"] = year_res

        xxxx_no = re.findall('''"webReportNo": "(.*?)",''', year_res)

        if not xxxx_no or not year:
            continue

        xxxx_no = xxxx_no[0]
        # 网站信息
        year_web_url = root_url + '/pubreportbusiweb/doReadPubReportBusiWebListJSON.do?_id=doReadPubReportBusiWeb%s' % int(
            time.time() * 1000)

        req_web_data = {
            "webReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        raw_year_web_res = com_req.post(year_web_url, data=req_web_data).content
        raw_year_web_list.append(raw_year_web_res)
        raw_year_report_dict["web"] = raw_year_web_list

        # 股东及出资信息
        req_share_data = {
            "conReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        year_share_url = root_url + '/pubreportconinfo/doReadPubReportConInfoListJSON.do?_id=doReadPubReportConInfo%s' % int(
            time.time() * 1000)
        raw_year_share_res = com_req.post(year_share_url, data=req_share_data).content
        raw_year_share_list.append(raw_year_share_res)
        raw_year_report_dict["share"] = raw_year_share_list

        # 对外投资信息
        req_invest_data = {
            "investReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        year_invest_url = root_url + '/pubreportinvestinfo/doReadPubReportInvestInfoListJSON.do?_id=doReadPubReportConInfo%s' % int(
            time.time() * 1000)
        raw_year_invest_res = com_req.post(year_invest_url, data=req_invest_data).content
        raw_year_invest_list.append(raw_year_invest_res)
        raw_year_report_dict["invest"] = raw_year_invest_list

        # 对外担保信息
        req_guarantee_data = {
            "guarReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "guarIsp": "1",
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        year_guarantee_url = root_url + '/pubreportguaranteeinfo/doReadPubReportGuaranteeInfoListJSON.do?_id=doReadPubReportConInfo%s' % int(
            time.time() * 1000)
        raw_year_guarantee_res = com_req.post(year_guarantee_url, data=req_guarantee_data).content
        raw_year_guarantee_list.append(raw_year_guarantee_res)
        raw_year_report_dict["guarantee"] = raw_year_guarantee_list

        # 股权变更信息
        req_stock_data = {
            "stockReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        year_stock_url = root_url + '/pubreportstockinfo/doReadPubReportStockInfoListJSON.do?_id=doReadPubReportConInfo%s' % int(
            time.time() * 1000)
        raw_year_stock_res = com_req.post(year_stock_url, data=req_stock_data).content
        raw_year_stock_list.append(raw_year_stock_res)
        raw_year_report_dict["stock"] = raw_year_stock_list

        # 修改记录
        req_mod_data = {
            "modReportNo": xxxx_no,
            "corpid": corpid,
            "year": year,
            "modType": "1",
            "pagination.currentPage": "1",
            "pagination.pageSize": "500",
        }
        year_stock_url = root_url + '/pubreportmodhis/doReadPubReportModHisJSON.do?_id=doReadPubReportModHis%s' % int(
            time.time() * 1000)
        raw_year_mod_res = com_req.post(year_stock_url, data=req_mod_data).content
        raw_year_mod_list.append(raw_year_mod_res)
        raw_year_report_dict["modify"] = raw_year_mod_list

        # raw_year_report_dict["html"] = year_res

        raw_year_report_list.append(raw_year_report_dict)

    raw_dict["html"] = base_html_dict
    raw_dict["yearList"] = raw_year_report_list

    return raw_dict


def parse_reg_no(reg_no_html):
    reg_no = re.findall('注册号：(.*?)&nbsp;', str(reg_no_html)) if reg_no_html else ""
    return reg_no[0] if reg_no else ""


def extract_base_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_html = raw_dict.get("html", {})
    if not raw_html:
        raise Exception("raw_dict 错误")

    # 基本信息
    raw_base = raw_html.get("base")
    base_soup = BeautifulSoup(raw_base, "html5lib")
    base_table = base_soup.find_all("table", attrs={"id": "baseinfo"})
    if not base_table:
        raise Exception()

    res_dict = copy.deepcopy(TE.void_base_dict)

    res_dict["basicList"] = table.index("基本信息", str(base_table[0]))
    # 处理注册号和统一信用号
    a_raw_reg = res_dict["basicList"][0].get("regNo", "") if res_dict else ''
    if a_raw_reg and len(a_raw_reg) == 18:
        res_dict["basicList"][0]["SocialCreditIdentifier"] = a_raw_reg
    raw_reg_html = raw_html.get("reg", "")
    if raw_reg_html:
        res_dict["basicList"][0]["regNo"] = parse_reg_no(raw_reg_html)

    res_dict["province"] = "zj"

    # 股东信息
    share_table = table.table_clean(str(raw_base), "股东信息") or (
    table.table_clean(str(raw_base), "投资人信息") or table.table_clean(str(raw_base), "股东（发起人）信息"))
    res_dict["shareHolderList"] = table.index("股东信息", share_table) or [] if share_table else []

    # 变更信息
    raw_alter_html = table.table_clean(str(raw_base), "变更信息")
    res_dict["alterList"] = table.index("变更信息", raw_alter_html) or [] if raw_alter_html else []

    # 备案信息
    raw_filinginfo = raw_html.get("filinginfo", "")

    # 主要人员信息
    raw_person_html = table.table_clean(raw_filinginfo, "主要人员信息") or table.table_clean(raw_filinginfo, "家庭成员信息")
    res_dict["personList"] = table.index("主要人员信息", raw_person_html) or [] if raw_person_html else []

    # 分支机构信息
    raw_branch_html = table.table_clean(raw_filinginfo, "分支机构信息")
    res_dict["filiationList"] = table.index("分支机构信息", raw_branch_html) or [] if raw_branch_html else []

    # 清算信息
    raw_liquidation_html = table.table_clean(raw_filinginfo, "清算信息")
    res_dict["liquidationList"] = table.index("清算信息", raw_liquidation_html) or [] if raw_liquidation_html else []

    # 经营异常
    raw_catalogapply = raw_html.get("catalogapply", [])
    for a_table in BeautifulSoup(raw_catalogapply, "html5lib").find_all("table", attrs={"class": "detailsList"}):
        a_table = str(a_table)
        if "经营异常信息" in a_table:
            res_dict["abnormalOperation"] = table.index("经营异常信息", a_table)

    # 抽查检查
    raw_pubcheckresult = raw_html.get("pubcheckresult")
    for a_table in BeautifulSoup(raw_pubcheckresult, "html5lib").find_all("table", attrs={"class": "detailsList"}):
        a_table = str(a_table)
        if "抽查检查信息" in a_table:
            res_dict["checkMessage"] = table.index("抽查检查信息", a_table)

    # 翻页的变更(仅个元素)
    raw_alter_more = raw_html.get("alter_more", [])
    for item in raw_alter_more:
        for a_table in BeautifulSoup(item, "html5lib").find_all("table", attrs={"class": "detailsList"}):
            a_table = str(a_table)
            if "变更信息" in a_table:
                res_dict["alterList"] = table.index("变更信息", a_table)

    # 翻页的股东
    raw_share_more = raw_html.get("share_more", [])
    for item in raw_share_more:
        for a_table in BeautifulSoup(item, "html5lib").find_all("table", attrs={"class": "detailsList"}):
            a_table = str(a_table)
            if "股东信息" in a_table:
                res_dict["sharesFrostList"] = table.index("股东信息", a_table)

    # 翻页的人员
    raw_people_more = raw_html.get("people_more", [])
    for item in raw_people_more:
        for a_table in BeautifulSoup(item, "html5lib").find_all("table", attrs={"class": "detailsList"}):
            a_table = str(a_table)
            if "主要人员信息" in a_table or "参加经营的家庭成员姓名" in a_table:
                res_dict["personList"] = table.index("主要人员信息", a_table)

    # 翻页的分支机构
    raw_branch_more = raw_html.get("branch_more", [])
    for item in raw_branch_more:
        for a_table in BeautifulSoup(item, "html5lib").find_all("table", attrs={"class": "detailsList"}):
            a_table = str(a_table)
            if "分支机构信息" in a_table:
                res_dict["filiationList"] = table.index("分支机构信息", a_table)

    return res_dict


def num_to_time(num):
    if num and isinstance(num, int) and len(str(num)) > 9:
        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(num / 1000))
    else:
        return ""


def num_str_to_type(num_str):
    if num_str and isinstance(num_str, basestring):
        num_str_trans_dict = {
            "1": "货币",
            "2": "实物",
            "3": "知识产权",
            "4": "债权",
            "6": "土地使用权",
            "7": "股权",
            "9": "其他",
        }
        for key in num_str_trans_dict.keys():
            num_str = num_str.replace(key, num_str_trans_dict.get(key, ""))
        return num_str

    else:
        return ""


def extract_year_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_year_list = raw_dict.get("yearList", [])

    res_year_list = []

    for a_raw_year_item in raw_year_list:
        res_year_dict = copy.deepcopy(TE.void_year_dict)
        year = a_raw_year_item.get("year", "")
        res_year_dict["year"] = year

        # 基本信息
        raw_year_base_html = a_raw_year_item.get("base", "")
        raw_year_base_table_s = BeautifulSoup(raw_year_base_html, "html5lib").find_all("table",
                                                                                       attrs={"class": "detailsList"})
        for a_table in raw_year_base_table_s:
            a_table = str(a_table)
            if "企业基本信息" in a_table:
                res_year_dict["baseInfo"] = table.report_index("企业基本信息", a_table)
                # 默认值
                res_year_dict["baseInfo"]["haveWebsite"] = "否"
                res_year_dict["baseInfo"]["buyEquity"] = "否"
                res_year_dict["baseInfo"]["equityTransfer"] = "否"
                break

        # 企业资产状况信息
        res_year_dict["assetsInfo"] = {}   # 防止模板被改变的bug
        for a_table in raw_year_base_table_s:
            a_table = str(a_table)
            if "企业资产状况信息" in a_table:
                res_year_dict["assetsInfo"] = table.report_index("企业资产状况信息", a_table)
                break

        # 网站信息
        raw_year_web_html = a_raw_year_item.get("web", [])
        res_year_dict["website"] = {}   # 防止模板被改变的bug
        for item in raw_year_web_html:
            web_data_list = json.loads(item).get("pagination", {}).get("dataList", [])
            if web_data_list:
                website = FO.transform_dict(TE.website_dict, TR.website_dict, web_data_list[0])
                if website:
                    website['type'] = '网站'
                res_year_dict["website"] = website
                res_year_dict["baseInfo"]["haveWebsite"] = "是"

        # 股东及出资信息
        raw_year_share_html = a_raw_year_item.get("share", [])
        res_year_share_list = []
        for item in raw_year_share_html:
            share_data_list = json.loads(item).get("pagination", {}).get("dataList", [])
            for a_share_data in share_data_list:
                investorInformations = FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict,
                                                         a_share_data)
                investorInformations["subConDate"] = num_to_time(
                    (a_share_data.get("conInfoPayDate", {}) or {}).get("time", 0)) or ''
                investorInformations["paidTime"] = num_to_time(
                    (a_share_data.get("conInfoActDate", {}) or {}).get("time", 0)) or ''

                investorInformations["subConType"] = num_str_to_type(a_share_data.get("conInfoInvForm", ""))
                investorInformations["paidType"] = num_str_to_type(a_share_data.get("conInfoActForm", ""))

                res_year_share_list.append(investorInformations)

        res_year_dict["investorInformations"] = res_year_share_list

        # 对外投资信息
        raw_year_invest_html = a_raw_year_item.get("invest", [])
        raw_year_invest_list = []
        for item in raw_year_invest_html:
            invest_data_list = json.loads(item).get("pagination", {}).get("dataList", [])
            for a_invest_data in invest_data_list:
                entinvItem = FO.transform_dict(TE.entinvItem_dict, TR.entinvItem_dict, a_invest_data)
                raw_year_invest_list.append(entinvItem)
                res_year_dict["baseInfo"]["buyEquity"] = "是"

        res_year_dict["entinvItem"] = raw_year_invest_list


        # 对外担保信息
        raw_year_guarantee_html = a_raw_year_item.get("guarantee", [])

        # 股权变更信息
        # TODO
        raw_year_stock_html = a_raw_year_item.get("stock", [])
        raw_year_stock_list = []
        for item in raw_year_stock_html:
            stock_data_list = json.loads(item).get("pagination", {}).get("dataList", [])
            # print json.dumps(stock_data_list, ensure_ascii=False, indent=4)
            for a_stock_data in stock_data_list:
                equityChange = FO.transform_dict(TE.equityChangeInformations_dict, TR.equityChangeInformations_dict,
                                                 a_stock_data)
                raw_year_stock_list.append(equityChange)
                res_year_dict["baseInfo"]["equityTransfer"] = "是"

        # 修改记录
        raw_year_modify_html = a_raw_year_item.get("modify", [])
        raw_year_modify_list = []
        for item in raw_year_modify_html:
            modify_data_list = json.loads(item).get("pagination", {}).get("dataList", [])
            for a_modify_data in modify_data_list:
                changeRecords = FO.transform_dict(TE.changeRecords_dict, TR.changeRecords_dict, a_modify_data)
                changeRecords["time"] = num_to_time((a_modify_data.get("modDate", {}) or {}).get("time", 0)) or ''
                raw_year_modify_list.append(changeRecords)

        res_year_dict["changeRecords"] = raw_year_modify_list

        # 整理补全

        res_year_list.append(res_year_dict)

    return res_year_list


def search2(companyName, MAXTIME=40):
    # res = ''
    res = 'error'
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME

    exception_num = 0
    error_num = -1
    test_try_num = -1

    while a_time > 0:
        # print res, '*'*20
        test_try_num += 1
        if res is None:  # 公司不存在
            return None
            # return None, None, None, exception_num, error_num, test_try_num

        # 统计错误率
        elif res == 'exception':  # 验证码api异常
            exception_num += 1
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                res = download_captcha_kill(companyName)
            except Exception, e:
                traceback.print_exc(e)
                raise e

        elif res == 'error':  # 验证码错误
            error_num += 1
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                res = download_captcha_kill(companyName)
            except Exception, e:
                traceback.print_exc(e)
                raise e

        elif res == 're download':
            res = download_captcha_kill(companyName)
            test_try_num -= 1
        else:
            break

            # 原代码
            # elif res == '':     # 验证码错误
            #     if a_time < MAXTIME:
            #         logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            #     a_time -= 1
            #     try:
            #         # time.sleep(10)
            #         res = download_captcha_kill(companyName)
            #         # print res
            #     except Exception, e:
            #         traceback.print_exc(e)
            #         raise e
            # else:
            #     break
            #
            #
    com_list = res
    res = get_company_info(com_list)
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            company_name = asic_dict['basicList'][0].get('enterpriseName', '')
            company_name = company_name if company_name else companyName
            res['companyName'] = company_name
            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://gsxt.zjaic.gov.cn/',
                'method': 'get',
                'province': 'zj',
                'companyName': company_name,
                'data': com_list,
            }

            return res, asic_dict, gate_method
            # return res, asic_dict, gate_method, exception_num, error_num, test_try_num
        except Exception, e:
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://gsxt.zjaic.gov.cn/',
                'method': 'get',
                'province': 'zj',
                'companyName': companyName,
                'data': com_list,
            }
            # raise Exception(e)
            return res, None, gate_method
            # return res, None, gate_method, exception_num, error_num, test_try_num


def search(companyName):
    res = search2(companyName)
    if not res:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        res['companyName'] = asic_dict['basicList'][0].get('enterpriseName', '')

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://gsxt.zjaic.gov.cn/',
            'method': 'get',
            'province': 'zj',
            'companyName': asic_dict['basicList'][0].get('enterpriseName', ''),
            'data': com_list,
        }

        return res, asic_dict, gate_method
    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://gsxt.zjaic.gov.cn/',
            'method': 'get',
            'province': 'zj',
            'companyName': companyName,
            'data': com_list,
        }
        # raise Exception(e)
        return res, None, gate_method


if __name__ == "__main__":
    # companyName = '杭州微光电子股份有限公司'
    # companyName = '阿里巴巴（中国）网络技术有限公司'
    # companyName = '淘宝网'

    # 主要人员翻页,变更翻页
    # companyName = '浙江康盛股份有限公司'
    # companyName = '杭州巨星科技股份有限公司'

    # 主要人员翻页,变更翻页,股东翻页
    # 杭州微光电子股份有限公司

    # # 经营异常
    # companyName = '杭州玮希通信器材有限公司'
    # companyName = '湖州织里范凎明制衣厂'
    # companyName = '宁波禄鼎金融服务外包合伙企业（有限合伙）'

    # 抽查检查
    # companyName = '浙江周大鲜海洋食品股份有限公司'

    # companyName = '杭州市拱墅区供销合作总社'

    # companyName = '杭州誉存科技有限公司'
    # companyName = '庄吉集团有限公司'
    companyName = '农夫山泉股份有限公司'

    # companyName = '阿里巴巴中国'

    # 个体
    # 三门县蚂蚁机械厂
    # 三门县小蚂蚁汽车租赁中介服务部


    # download_captcha_kill(companyName)

    res = search2(companyName)
    print json.dumps(res, ensure_ascii=False, indent=4)

    # get_check()
